#include "tokenizer/qwen2_tokenizer.h"

#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include <locale>
#include <codecvt>

using namespace pa;

TEST(Qwen2TokenizerTest, unicode_nfc_normalize) {
    Qwen2Tokenizer tokenizer;
    std::string prompt = u8"你好，世界！";
    std::u32string normalized_prompt = tokenizer.unicode_nfc_normalize(prompt);

    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
    EXPECT_EQ(converter.to_bytes(normalized_prompt), prompt);
}